import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
df = pd.read_csv('C:/Users/AmlaHardk/Desktop/Methane_final.csv')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1548 entries, 0 to 1547 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 1548 non-null int64 1 region 1548 non-null object 2 country 1548 non-null object 3 emissions 1548 non-null float64 4 type 1548 non-null object 5 segment 1548 non-null object 6 reason 1548 non-null object 7 baseYear 1548 non-null object 8 notes 1548 non-null object dtypes: float64(1), int64(1), object(7) memory usage: 109.0+ KB
df = df.drop(columns = ['Unnamed: 0','notes'], inplace = True)
df.head()
| Unnamed: 0 | region | country | emissions | type | segment | reason | baseYear | notes | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Africa | Algeria | 257.611206 | Agriculture | Total | All | 2019-2021 | Average based on United Nations Framework Conv... |
| 1 | 1 | Africa | Algeria | 0.052000 | Energy | Bioenergy | All | 2022 | Estimates from end-uses are for 2020 or 2021 (... |
| 2 | 2 | Africa | Algeria | 130.798996 | Energy | Gas pipelines and LNG facilities | Fugitive | 2022 | Not available |
| 3 | 3 | Africa | Algeria | 69.741898 | Energy | Gas pipelines and LNG facilities | Vented | 2022 | Not available |
| 4 | 4 | Africa | Algeria | 213.987000 | Energy | Onshore gas | Fugitive | 2022 | Not available |
df.describe()
| Unnamed: 0 | emissions | |
|---|---|---|
| count | 1548.000000 | 1548.000000 |
| mean | 773.500000 | 643.255972 |
| std | 447.013423 | 5566.238201 |
| min | 0.000000 | 0.000459 |
| 25% | 386.750000 | 2.659361 |
| 50% | 773.500000 | 24.064669 |
| 75% | 1160.250000 | 128.419594 |
| max | 1547.000000 | 141953.765625 |
df.isnull().sum()
Unnamed: 0 0 region 0 country 0 emissions 0 type 0 segment 0 reason 0 baseYear 0 notes 0 dtype: int64
temp = df
temp=temp[(temp['region']!='World')&(temp['segment']!='Total')]
sns.barplot(x='region',y='emissions',data=region)
plt.xticks(rotation=90)
plt.title('total emissions in every region')
plt.show()
seg=df.groupby('segment')[['emissions']].sum()[:11].reset_index()
sns.barplot(x='segment',y='emissions',data=seg)
plt.title('emissions by their segment')
plt.xticks(rotation=90)
plt.show()
cntry=df.groupby('country')[['emissions']].sum().reset_index()
cntry=cntry.sort_values(by='emissions',ascending=False)[1:11]
cntry
| country | emissions | |
|---|---|---|
| 16 | China | 81048.371586 |
| 98 | United States | 48604.877296 |
| 77 | Russia | 42432.929804 |
| 38 | India | 34852.007386 |
| 10 | Brazil | 21720.838126 |
| 39 | Indonesia | 19404.469000 |
| 29 | European Union | 18985.173461 |
| 65 | Other | 15997.357175 |
| 40 | Iran | 13030.685366 |
| 62 | Nigeria | 9903.895667 |
plt.figure(figsize=(15,6))
sns.barplot(x='country',y='emissions',data=cntry)
plt.title('countries with highest emission')
plt.show()
df.head()
| Unnamed: 0 | region | country | emissions | type | segment | reason | baseYear | notes | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Africa | Algeria | 257.611206 | Agriculture | Total | All | 2019-2021 | Average based on United Nations Framework Conv... |
| 1 | 1 | Africa | Algeria | 0.052000 | Energy | Bioenergy | All | 2022 | Estimates from end-uses are for 2020 or 2021 (... |
| 2 | 2 | Africa | Algeria | 130.798996 | Energy | Gas pipelines and LNG facilities | Fugitive | 2022 | Not available |
| 3 | 3 | Africa | Algeria | 69.741898 | Energy | Gas pipelines and LNG facilities | Vented | 2022 | Not available |
| 4 | 4 | Africa | Algeria | 213.987000 | Energy | Onshore gas | Fugitive | 2022 | Not available |
data = df
year_emissions = data[data['country'] != 'World'].groupby('baseYear')['emissions'].sum()
px.sunburst(temp,values='emissions',path=['region','segment'],color='region',width=700,height=700)
px.pie(temp2,values='emissions',names='type',hole=0.5)
data = df
year_emissions = data[data['country'] != 'World'].groupby('baseYear')['emissions'].sum()
# Sort year emissions in descending order, then get top 10
top10_year_emissions = year_emissions.sort_values(ascending=False).head(10)
# Plot bar chart for top 10 year emissions
plt.figure(figsize=(12, 6))
plt.bar(top10_year_emissions.index, top10_year_emissions.values, color=plt.cm.Paired(range(len(top10_year_emissions))))
plt.xlabel('Year', fontsize=12)
plt.ylabel('Emissions', fontsize=12)
plt.title('Top 10 Years by Emissions (excluding World)', fontsize=16)
plt.xticks(rotation=45)
# Show the plot
plt.show()